From: Keir Fraser Date: Mon, 28 Jan 2008 10:17:05 +0000 (+0000) Subject: x86-64: use 1GB pages in 1:1 mapping if available X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14368 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22Dat/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22Dat?a=commitdiff_plain;h=4376c05c311320beb528213b1ae3bb0371e2b6c8;p=xen.git x86-64: use 1GB pages in 1:1 mapping if available At once adjust the 2/4Mb page handling slightly in a few places (to match the newly added code): - when re-creating a large page mapping after finding that all small page mappings in the respective area are using identical flags and suitable MFNs, the virtual address was already incremented pas the area to be dealt with, which needs to be accounted for in the invocation of flush_area() in that path - don't or-in/and-out _PAGE_PSE on non-present pages - when comparing flags, try minimse the number of l1f_to_lNf()/ lNf_to_l1f() instances used - instead of skipping a single page when encountering a big page mapping equalling to what a small page mapping would establish, skip to the next larger page boundary Signed-off-by: Jan Beulich --- diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 177ba93a7a..0b59d7a3c2 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3684,8 +3684,9 @@ void free_xen_pagetable(void *v) { extern int early_boot; - BUG_ON(early_boot); - + if ( early_boot ) + return; + if ( is_xen_heap_page(virt_to_page(v)) ) free_xenheap_page(v); else @@ -3693,8 +3694,8 @@ void free_xen_pagetable(void *v) } /* Convert to from superpage-mapping flags for map_pages_to_xen(). */ -#define l1f_to_l2f(f) ((f) | _PAGE_PSE) -#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE) +#define l1f_to_lNf(f) (((f) & _PAGE_PRESENT) ? ((f) | _PAGE_PSE) : (f)) +#define lNf_to_l1f(f) (((f) & _PAGE_PRESENT) ? ((f) & ~_PAGE_PSE) : (f)) /* * map_pages_to_xen() can be called with interrupts disabled: @@ -3720,6 +3721,127 @@ int map_pages_to_xen( while ( nr_mfns != 0 ) { +#ifdef __x86_64__ + l3_pgentry_t *pl3e = virt_to_xen_l3e(virt); + l3_pgentry_t ol3e = *pl3e; + + if ( cpu_has_page1gb && + !(((virt >> PAGE_SHIFT) | mfn) & + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) && + nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) && + !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) ) + { + /* 1GB-page mapping. */ + l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags))); + + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) ) + { + unsigned int flush_flags = + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); + + if ( l3e_get_flags(ol3e) & _PAGE_PSE ) + { + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + flush_area(virt, flush_flags); + } + else + { + pl2e = l3e_to_l2e(ol3e); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + ol2e = pl2e[i]; + if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) ) + continue; + if ( l2e_get_flags(ol2e) & _PAGE_PSE ) + { + if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + } + else + { + unsigned int j; + + pl1e = l2e_to_l1e(ol2e); + for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) + { + ol1e = pl1e[j]; + if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + if ( (l1e_get_flags(ol1e) ^ flags) & + PAGE_CACHE_ATTRS ) + flush_flags |= FLUSH_CACHE; + } + } + } + flush_area(virt, flush_flags); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + { + ol2e = pl2e[i]; + if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) && + !(l2e_get_flags(ol2e) & _PAGE_PSE) ) + free_xen_pagetable(l2e_to_l1e(ol2e)); + } + free_xen_pagetable(pl2e); + } + } + + virt += 1UL << L3_PAGETABLE_SHIFT; + mfn += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + + if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) && + (l3e_get_flags(ol3e) & _PAGE_PSE) ) + { + unsigned int flush_flags = + FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); + + /* Skip this PTE if there is no change. */ + if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES * + L1_PAGETABLE_ENTRIES - 1)) + + (l2_table_offset(virt) << PAGETABLE_ORDER) + + l1_table_offset(virt) == mfn) && + ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) & + ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 ) + { + /* We can skip to end of L3 superpage if we got a match. */ + i = (1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - + (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); + if ( i > nr_mfns ) + i = nr_mfns; + virt += i << PAGE_SHIFT; + mfn += i; + nr_mfns -= i; + continue; + } + + pl2e = alloc_xen_pagetable(); + if ( pl2e == NULL ) + return -ENOMEM; + + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + l2e_write(pl2e + i, + l2e_from_pfn(l3e_get_pfn(ol3e) + + (i << PAGETABLE_ORDER), + l3e_get_flags(ol3e))); + + if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) + flush_flags |= FLUSH_TLB_GLOBAL; + + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), + __PAGE_HYPERVISOR)); + flush_area(virt, flush_flags); + } +#endif + pl2e = virt_to_xen_l2e(virt); if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1< nr_mfns ) + i = nr_mfns; + virt += i << L1_PAGETABLE_SHIFT; + mfn += i; + nr_mfns -= i; + goto check_l3; } pl1e = alloc_xen_pagetable(); @@ -3800,7 +3927,7 @@ int map_pages_to_xen( for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) l1e_write(&pl1e[i], l1e_from_pfn(l2e_get_pfn(*pl2e) + i, - l2f_to_l1f(l2e_get_flags(*pl2e)))); + lNf_to_l1f(l2e_get_flags(*pl2e)))); if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) flush_flags |= FLUSH_TLB_GLOBAL; @@ -3843,13 +3970,45 @@ int map_pages_to_xen( { ol2e = *pl2e; l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn, - l1f_to_l2f(flags))); - flush_area(virt, (FLUSH_TLB_GLOBAL | - FLUSH_ORDER(PAGETABLE_ORDER))); + l1f_to_lNf(flags))); + flush_area(virt - PAGE_SIZE, + FLUSH_TLB_GLOBAL | + FLUSH_ORDER(PAGETABLE_ORDER)); free_xen_pagetable(l2e_to_l1e(ol2e)); } } } + + check_l3: ; +#ifdef __x86_64__ + if ( cpu_has_page1gb && + (flags == PAGE_HYPERVISOR) && + ((nr_mfns == 0) || + !(((virt >> PAGE_SHIFT) | mfn) & + ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) ) + { + unsigned long base_mfn; + + ol3e = *pl3e; + pl2e = l3e_to_l2e(ol3e); + base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES * + L1_PAGETABLE_ENTRIES - 1); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) + if ( (l2e_get_pfn(*pl2e) != + (base_mfn + (i << PAGETABLE_ORDER))) || + (l2e_get_flags(*pl2e) != l1f_to_lNf(flags)) ) + break; + if ( i == L2_PAGETABLE_ENTRIES ) + { + l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn, + l1f_to_lNf(flags))); + flush_area(virt - PAGE_SIZE, + FLUSH_TLB_GLOBAL | + FLUSH_ORDER(2*PAGETABLE_ORDER)); + free_xen_pagetable(l3e_to_l2e(ol3e)); + } + } +#endif } return 0; @@ -3867,6 +4026,40 @@ void destroy_xen_mappings(unsigned long s, unsigned long e) while ( v < e ) { +#ifdef __x86_64__ + l3_pgentry_t *pl3e = virt_to_xen_l3e(v); + + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) + { + v += 1UL << L3_PAGETABLE_SHIFT; + v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); + continue; + } + + if ( l3e_get_flags(*pl3e) & _PAGE_PSE ) + { + if ( l2_table_offset(v) == 0 && + l1_table_offset(v) == 0 && + ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) ) + { + /* PAGE1GB: whole superpage is destroyed. */ + l3e_write_atomic(pl3e, l3e_empty()); + v += 1UL << L3_PAGETABLE_SHIFT; + continue; + } + + /* PAGE1GB: shatter the superpage and fall through. */ + pl2e = alloc_xen_pagetable(); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + l2e_write(pl2e + i, + l2e_from_pfn(l3e_get_pfn(*pl3e) + + (i << PAGETABLE_ORDER), + l3e_get_flags(*pl3e))); + l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), + __PAGE_HYPERVISOR)); + } +#endif + pl2e = virt_to_xen_l2e(v); if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) @@ -3919,6 +4112,23 @@ void destroy_xen_mappings(unsigned long s, unsigned long e) free_xen_pagetable(pl1e); } } + +#ifdef __x86_64__ + /* If we are done with the L3E, check if it is now empty. */ + if ( (v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0) ) + continue; + pl2e = l3e_to_l2e(*pl3e); + for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) + if ( l2e_get_intpte(pl2e[i]) != 0 ) + break; + if ( i == L2_PAGETABLE_ENTRIES ) + { + /* Empty: zap the L3E and free the L2 page. */ + l3e_write_atomic(pl3e, l3e_empty()); + flush_area(NULL, FLUSH_TLB_GLOBAL); /* flush before free */ + free_xen_pagetable(pl2e); + } +#endif } flush_area(NULL, FLUSH_TLB_GLOBAL); diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index f748efb489..383a868225 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -674,8 +674,9 @@ void __init __start_xen(unsigned long mbi_p) pl3e = l4e_to_l3e(*pl4e); for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ ) { - /* Not present or already relocated? */ + /* Not present, 1GB mapping, or already relocated? */ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) || + (l3e_get_flags(*pl3e) & _PAGE_PSE) || (l3e_get_pfn(*pl3e) > 0x1000) ) continue; *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) + diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 5d9490a4fe..f9f33e0a88 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -69,30 +69,35 @@ void *alloc_xen_pagetable(void) return mfn_to_virt(mfn); } -l2_pgentry_t *virt_to_xen_l2e(unsigned long v) +l3_pgentry_t *virt_to_xen_l3e(unsigned long v) { l4_pgentry_t *pl4e; - l3_pgentry_t *pl3e; - l2_pgentry_t *pl2e; pl4e = &idle_pg_table[l4_table_offset(v)]; if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) { - pl3e = alloc_xen_pagetable(); + l3_pgentry_t *pl3e = alloc_xen_pagetable(); clear_page(pl3e); l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); } - pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v); + return l4e_to_l3e(*pl4e) + l3_table_offset(v); +} + +l2_pgentry_t *virt_to_xen_l2e(unsigned long v) +{ + l3_pgentry_t *pl3e; + + pl3e = virt_to_xen_l3e(v); if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) { - pl2e = alloc_xen_pagetable(); + l2_pgentry_t *pl2e = alloc_xen_pagetable(); clear_page(pl2e); l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); } - - pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); - return pl2e; + + BUG_ON(l3e_get_flags(*pl3e) & _PAGE_PSE); + return l3e_to_l2e(*pl3e) + l2_table_offset(v); } void __init paging_init(void) diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index ef6ecd4225..71e8fbe210 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -146,9 +146,11 @@ void show_page_walk(unsigned long addr) l3e = l3t[l3_table_offset(addr)]; mfn = l3e_get_pfn(l3e); pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; - printk(" L3[0x%03lx] = %"PRIpte" %016lx\n", - l3_table_offset(addr), l3e_get_intpte(l3e), pfn); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n", + l3_table_offset(addr), l3e_get_intpte(l3e), pfn, + (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : ""); + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || + (l3e_get_flags(l3e) & _PAGE_PSE) ) return; l2t = mfn_to_virt(mfn); diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index b20ddb26dd..ac92b0c80e 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -350,6 +350,9 @@ static inline int get_order_from_pages(unsigned long nr_pages) void *alloc_xen_pagetable(void); void free_xen_pagetable(void *v); l2_pgentry_t *virt_to_xen_l2e(unsigned long v); +#ifdef __x86_64__ +l3_pgentry_t *virt_to_xen_l3e(unsigned long v); +#endif /* Map machine page range in Xen virtual address space. */ #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */